We’re curious to see if there is a relationship between longest leaf length to total shoot biomass. Data available: In 2017 21 sites were surveyed for seagrass. At each site, 8-0.25m^2 quadrats were surveyed and 5 entire shoots (i.e. multiple leaves) were removed and brought back to the lab to be measured (in cm). Each shoot (n = 840) was weighed to get total fresh and dry biomass (in grams) and each leaf on the shoot had its length (cm) and width (cm) measured.

We are interested in the length of the longest leaf and the dry biomass.

If there is a strong relationship, then I would like to use the linear model from 2017 to predict biomass values based on longest leaf length in 2019. # Data import

eelgrass <- read.csv("../APECS Master repository/APECS Master repo/ALL_DATA/seagrass_biometrics_CLEAN.csv")

Libraries

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.4.0     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.7
## ✔ tidyr   1.1.4     ✔ stringr 1.4.0
## ✔ readr   2.1.1     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select

Data Cleaning

# extract what we're interested in 
str(eelgrass)
## 'data.frame':    840 obs. of  71 variables:
##  $ site                  : chr  "2017_H_01" "2017_H_01" "2017_H_01" "2017_H_01" ...
##  $ collection_date       : chr  "29/4/17" "29/4/17" "29/4/17" "29/4/17" ...
##  $ YYYYMMDD              : int  20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 ...
##  $ quadrat               : int  1 1 1 1 1 2 2 2 2 2 ...
##  $ plant                 : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ rhi_length            : num  5 5 5 4.6 5 4.6 4 5 5 3.6 ...
##  $ node1                 : num  0.5 0.7 0.3 0.4 0.2 1.2 0.3 0.3 0.5 0.4 ...
##  $ node2                 : num  0.4 0.4 0.3 0.4 0.2 0.6 0.3 0.2 0.6 0.6 ...
##  $ node3                 : num  0.3 0.3 0.4 0.3 0.3 0.4 0.2 0.2 0.4 0.4 ...
##  $ node4                 : num  0.2 0.3 0.3 0.4 0.2 0.3 0.2 0.2 0.3 0.2 ...
##  $ node5                 : num  0.2 0.3 0.2 0.2 0.3 0.3 0.2 0.3 0.2 0.2 ...
##  $ leaf_length1          : num  22.1 25.2 16.4 21 13 32.4 23 16.5 26 26.5 ...
##  $ leaf_length2          : num  18.9 17.6 9.8 18 13.6 22.1 12.3 21.1 21.1 18.1 ...
##  $ leaf_length3          : num  14.5 22 14.1 8.5 9.6 25 16.1 22.7 20.2 25.7 ...
##  $ leaf_length4          : num  6.4 14.9 9.4 17 NA 27.3 9.3 8.7 10.2 11.3 ...
##  $ leaf_length5          : num  13.3 8 NA 16.5 NA 12.4 15.5 15.5 17.5 22.7 ...
##  $ leaf_length6          : num  NA 3.6 NA NA NA 9.2 NA NA NA NA ...
##  $ leaf_length7          : num  NA NA NA NA NA 6 NA NA NA NA ...
##  $ leaf_length8          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length9          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length10         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width1           : num  0.3 0.3 0.2 0.3 0.2 0.4 0.2 0.2 0.3 0.4 ...
##  $ leaf_width2           : num  0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.2 0.3 ...
##  $ leaf_width3           : num  0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width4           : num  0.2 0.3 0.2 0.2 NA 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width5           : num  0.2 0.3 NA 0.2 NA 0.2 0.3 0.3 0.2 0.4 ...
##  $ leaf_width6           : num  NA 0.2 NA NA NA 0.2 NA NA NA NA ...
##  $ leaf_width7           : num  NA NA NA NA NA 0.3 NA NA NA NA ...
##  $ leaf_width8           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width9           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width10          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ pad_mass_g            : num  0.543 0.529 0.536 0.491 0.592 ...
##  $ pad_epiphyte_mass_g   : num  0.549 0.53 0.543 0.494 0.596 ...
##  $ rhi_mass_fw           : num  0.227 0.488 0.346 0.397 0.455 ...
##  $ rhi_foil              : num  0.658 0.427 0.45 0.376 0.55 ...
##  $ rhi_foil_dw_g         : num  0.684 0.478 0.49 0.411 0.586 ...
##  $ shoot_mass_fw         : num  0.269 0.412 0.165 0.321 0.116 ...
##  $ shoot_foil            : num  0.905 0.728 0.758 0.611 0.708 ...
##  $ shoot_foil_dw         : num  0.944 0.791 0.783 0.66 0.724 ...
##  $ xs_shoot_mass_fw      : num  4.77 NA NA NA NA ...
##  $ xs_shoot_foil         : num  0.789 NA NA NA NA ...
##  $ xs_shoot_foil_dw      : num  1.38 NA NA NA NA ...
##  $ xs_pad_mass_g         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ xs_epiphyte_pad_mass_g: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_count             : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_fw                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil              : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil_dw           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gamm_amph_count       : int  3 NA NA NA NA 21 NA NA NA NA ...
##  $ gamm_amph_fw          : num  0.0059 NA NA NA NA 0.036 NA NA NA NA ...
##  $ gamm_amph_foil        : num  0.495 NA NA NA NA ...
##  $ gamm_amph_foil_dw     : num  0.497 NA NA NA NA ...
##  $ caprel_count          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_fw             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil_dw        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_count          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_fw             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil_dw        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_count       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_fw          : chr  "" "" "" "" ...
##  $ gastropod_foil        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_foil_dw     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_count            : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_fw               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil_dw          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ polychaete_count      : int  6 NA NA NA NA 7 NA NA NA NA ...
##  $ other_animal_notes    : chr  "" "" "" "" ...
##  $ notes                 : chr  "" "" "" "" ...
eel <- eelgrass %>%
  mutate(shoot_dw = shoot_foil_dw-shoot_foil)  # calculate dry weight
# determin max leaf length
eel2 <- eel %>% 
  rowwise() %>%
  mutate(max_length = max(leaf_length1, leaf_length2, leaf_length3, leaf_length4, 
                          leaf_length5, leaf_length6, leaf_length7, leaf_length8, 
                          leaf_length9, leaf_length10, na.rm=TRUE)) %>%
  mutate(mean_length = mean(leaf_length1, leaf_length2, leaf_length3, leaf_length4, 
                            leaf_length5, leaf_length6, leaf_length7, leaf_length8, 
                            leaf_length9, leaf_length10, na.rm=TRUE))
## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf
str(eel2)
## rowws_df [840 × 74] (S3: rowwise_df/tbl_df/tbl/data.frame)
##  $ site                  : chr [1:840] "2017_H_01" "2017_H_01" "2017_H_01" "2017_H_01" ...
##  $ collection_date       : chr [1:840] "29/4/17" "29/4/17" "29/4/17" "29/4/17" ...
##  $ YYYYMMDD              : int [1:840] 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 ...
##  $ quadrat               : int [1:840] 1 1 1 1 1 2 2 2 2 2 ...
##  $ plant                 : int [1:840] 1 2 3 4 5 1 2 3 4 5 ...
##  $ rhi_length            : num [1:840] 5 5 5 4.6 5 4.6 4 5 5 3.6 ...
##  $ node1                 : num [1:840] 0.5 0.7 0.3 0.4 0.2 1.2 0.3 0.3 0.5 0.4 ...
##  $ node2                 : num [1:840] 0.4 0.4 0.3 0.4 0.2 0.6 0.3 0.2 0.6 0.6 ...
##  $ node3                 : num [1:840] 0.3 0.3 0.4 0.3 0.3 0.4 0.2 0.2 0.4 0.4 ...
##  $ node4                 : num [1:840] 0.2 0.3 0.3 0.4 0.2 0.3 0.2 0.2 0.3 0.2 ...
##  $ node5                 : num [1:840] 0.2 0.3 0.2 0.2 0.3 0.3 0.2 0.3 0.2 0.2 ...
##  $ leaf_length1          : num [1:840] 22.1 25.2 16.4 21 13 32.4 23 16.5 26 26.5 ...
##  $ leaf_length2          : num [1:840] 18.9 17.6 9.8 18 13.6 22.1 12.3 21.1 21.1 18.1 ...
##  $ leaf_length3          : num [1:840] 14.5 22 14.1 8.5 9.6 25 16.1 22.7 20.2 25.7 ...
##  $ leaf_length4          : num [1:840] 6.4 14.9 9.4 17 NA 27.3 9.3 8.7 10.2 11.3 ...
##  $ leaf_length5          : num [1:840] 13.3 8 NA 16.5 NA 12.4 15.5 15.5 17.5 22.7 ...
##  $ leaf_length6          : num [1:840] NA 3.6 NA NA NA 9.2 NA NA NA NA ...
##  $ leaf_length7          : num [1:840] NA NA NA NA NA 6 NA NA NA NA ...
##  $ leaf_length8          : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length9          : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length10         : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width1           : num [1:840] 0.3 0.3 0.2 0.3 0.2 0.4 0.2 0.2 0.3 0.4 ...
##  $ leaf_width2           : num [1:840] 0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.2 0.3 ...
##  $ leaf_width3           : num [1:840] 0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width4           : num [1:840] 0.2 0.3 0.2 0.2 NA 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width5           : num [1:840] 0.2 0.3 NA 0.2 NA 0.2 0.3 0.3 0.2 0.4 ...
##  $ leaf_width6           : num [1:840] NA 0.2 NA NA NA 0.2 NA NA NA NA ...
##  $ leaf_width7           : num [1:840] NA NA NA NA NA 0.3 NA NA NA NA ...
##  $ leaf_width8           : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width9           : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width10          : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ pad_mass_g            : num [1:840] 0.543 0.529 0.536 0.491 0.592 ...
##  $ pad_epiphyte_mass_g   : num [1:840] 0.549 0.53 0.543 0.494 0.596 ...
##  $ rhi_mass_fw           : num [1:840] 0.227 0.488 0.346 0.397 0.455 ...
##  $ rhi_foil              : num [1:840] 0.658 0.427 0.45 0.376 0.55 ...
##  $ rhi_foil_dw_g         : num [1:840] 0.684 0.478 0.49 0.411 0.586 ...
##  $ shoot_mass_fw         : num [1:840] 0.269 0.412 0.165 0.321 0.116 ...
##  $ shoot_foil            : num [1:840] 0.905 0.728 0.758 0.611 0.708 ...
##  $ shoot_foil_dw         : num [1:840] 0.944 0.791 0.783 0.66 0.724 ...
##  $ xs_shoot_mass_fw      : num [1:840] 4.77 NA NA NA NA ...
##  $ xs_shoot_foil         : num [1:840] 0.789 NA NA NA NA ...
##  $ xs_shoot_foil_dw      : num [1:840] 1.38 NA NA NA NA ...
##  $ xs_pad_mass_g         : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ xs_epiphyte_pad_mass_g: num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_count             : int [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_fw                : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil              : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil_dw           : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ gamm_amph_count       : int [1:840] 3 NA NA NA NA 21 NA NA NA NA ...
##  $ gamm_amph_fw          : num [1:840] 0.0059 NA NA NA NA 0.036 NA NA NA NA ...
##  $ gamm_amph_foil        : num [1:840] 0.495 NA NA NA NA ...
##  $ gamm_amph_foil_dw     : num [1:840] 0.497 NA NA NA NA ...
##  $ caprel_count          : int [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_fw             : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil           : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil_dw        : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_count          : int [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_fw             : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil           : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil_dw        : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_count       : int [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_fw          : chr [1:840] "" "" "" "" ...
##  $ gastropod_foil        : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_foil_dw     : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_count            : int [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_fw               : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil             : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil_dw          : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ polychaete_count      : int [1:840] 6 NA NA NA NA 7 NA NA NA NA ...
##  $ other_animal_notes    : chr [1:840] "" "" "" "" ...
##  $ notes                 : chr [1:840] "" "" "" "" ...
##  $ shoot_dw              : num [1:840] 0.039 0.0629 0.0247 0.0489 0.0167 ...
##  $ max_length            : num [1:840] 22.1 25.2 16.4 21 13.6 32.4 23 22.7 26 26.5 ...
##  $ mean_length           : num [1:840] 22.1 25.2 16.4 21 13 32.4 23 16.5 26 26.5 ...
##  - attr(*, "groups")= tibble [840 × 1] (S3: tbl_df/tbl/data.frame)
##   ..$ .rows: list<int> [1:840] 
##   .. ..$ : int 1
##   .. ..$ : int 2
##   .. ..$ : int 3
##   .. ..$ : int 4
##   .. ..$ : int 5
##   .. ..$ : int 6
##   .. ..$ : int 7
##   .. ..$ : int 8
##   .. ..$ : int 9
##   .. ..$ : int 10
##   .. ..$ : int 11
##   .. ..$ : int 12
##   .. ..$ : int 13
##   .. ..$ : int 14
##   .. ..$ : int 15
##   .. ..$ : int 16
##   .. ..$ : int 17
##   .. ..$ : int 18
##   .. ..$ : int 19
##   .. ..$ : int 20
##   .. ..$ : int 21
##   .. ..$ : int 22
##   .. ..$ : int 23
##   .. ..$ : int 24
##   .. ..$ : int 25
##   .. ..$ : int 26
##   .. ..$ : int 27
##   .. ..$ : int 28
##   .. ..$ : int 29
##   .. ..$ : int 30
##   .. ..$ : int 31
##   .. ..$ : int 32
##   .. ..$ : int 33
##   .. ..$ : int 34
##   .. ..$ : int 35
##   .. ..$ : int 36
##   .. ..$ : int 37
##   .. ..$ : int 38
##   .. ..$ : int 39
##   .. ..$ : int 40
##   .. ..$ : int 41
##   .. ..$ : int 42
##   .. ..$ : int 43
##   .. ..$ : int 44
##   .. ..$ : int 45
##   .. ..$ : int 46
##   .. ..$ : int 47
##   .. ..$ : int 48
##   .. ..$ : int 49
##   .. ..$ : int 50
##   .. ..$ : int 51
##   .. ..$ : int 52
##   .. ..$ : int 53
##   .. ..$ : int 54
##   .. ..$ : int 55
##   .. ..$ : int 56
##   .. ..$ : int 57
##   .. ..$ : int 58
##   .. ..$ : int 59
##   .. ..$ : int 60
##   .. ..$ : int 61
##   .. ..$ : int 62
##   .. ..$ : int 63
##   .. ..$ : int 64
##   .. ..$ : int 65
##   .. ..$ : int 66
##   .. ..$ : int 67
##   .. ..$ : int 68
##   .. ..$ : int 69
##   .. ..$ : int 70
##   .. ..$ : int 71
##   .. ..$ : int 72
##   .. ..$ : int 73
##   .. ..$ : int 74
##   .. ..$ : int 75
##   .. ..$ : int 76
##   .. ..$ : int 77
##   .. ..$ : int 78
##   .. ..$ : int 79
##   .. ..$ : int 80
##   .. ..$ : int 81
##   .. ..$ : int 82
##   .. ..$ : int 83
##   .. ..$ : int 84
##   .. ..$ : int 85
##   .. ..$ : int 86
##   .. ..$ : int 87
##   .. ..$ : int 88
##   .. ..$ : int 89
##   .. ..$ : int 90
##   .. ..$ : int 91
##   .. ..$ : int 92
##   .. ..$ : int 93
##   .. ..$ : int 94
##   .. ..$ : int 95
##   .. ..$ : int 96
##   .. ..$ : int 97
##   .. ..$ : int 98
##   .. ..$ : int 99
##   .. .. [list output truncated]
##   .. ..@ ptype: int(0)
# subset only the data that was collected before July (the main part of the growing season)
eel_beforeJuly <- eel2 %>%
  filter(YYYYMMDD < 20170701) # before July

eel_sub <- eel2 # all data
str(eel_sub)
## rowws_df [840 × 74] (S3: rowwise_df/tbl_df/tbl/data.frame)
##  $ site                  : chr [1:840] "2017_H_01" "2017_H_01" "2017_H_01" "2017_H_01" ...
##  $ collection_date       : chr [1:840] "29/4/17" "29/4/17" "29/4/17" "29/4/17" ...
##  $ YYYYMMDD              : int [1:840] 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 ...
##  $ quadrat               : int [1:840] 1 1 1 1 1 2 2 2 2 2 ...
##  $ plant                 : int [1:840] 1 2 3 4 5 1 2 3 4 5 ...
##  $ rhi_length            : num [1:840] 5 5 5 4.6 5 4.6 4 5 5 3.6 ...
##  $ node1                 : num [1:840] 0.5 0.7 0.3 0.4 0.2 1.2 0.3 0.3 0.5 0.4 ...
##  $ node2                 : num [1:840] 0.4 0.4 0.3 0.4 0.2 0.6 0.3 0.2 0.6 0.6 ...
##  $ node3                 : num [1:840] 0.3 0.3 0.4 0.3 0.3 0.4 0.2 0.2 0.4 0.4 ...
##  $ node4                 : num [1:840] 0.2 0.3 0.3 0.4 0.2 0.3 0.2 0.2 0.3 0.2 ...
##  $ node5                 : num [1:840] 0.2 0.3 0.2 0.2 0.3 0.3 0.2 0.3 0.2 0.2 ...
##  $ leaf_length1          : num [1:840] 22.1 25.2 16.4 21 13 32.4 23 16.5 26 26.5 ...
##  $ leaf_length2          : num [1:840] 18.9 17.6 9.8 18 13.6 22.1 12.3 21.1 21.1 18.1 ...
##  $ leaf_length3          : num [1:840] 14.5 22 14.1 8.5 9.6 25 16.1 22.7 20.2 25.7 ...
##  $ leaf_length4          : num [1:840] 6.4 14.9 9.4 17 NA 27.3 9.3 8.7 10.2 11.3 ...
##  $ leaf_length5          : num [1:840] 13.3 8 NA 16.5 NA 12.4 15.5 15.5 17.5 22.7 ...
##  $ leaf_length6          : num [1:840] NA 3.6 NA NA NA 9.2 NA NA NA NA ...
##  $ leaf_length7          : num [1:840] NA NA NA NA NA 6 NA NA NA NA ...
##  $ leaf_length8          : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length9          : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length10         : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width1           : num [1:840] 0.3 0.3 0.2 0.3 0.2 0.4 0.2 0.2 0.3 0.4 ...
##  $ leaf_width2           : num [1:840] 0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.2 0.3 ...
##  $ leaf_width3           : num [1:840] 0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width4           : num [1:840] 0.2 0.3 0.2 0.2 NA 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width5           : num [1:840] 0.2 0.3 NA 0.2 NA 0.2 0.3 0.3 0.2 0.4 ...
##  $ leaf_width6           : num [1:840] NA 0.2 NA NA NA 0.2 NA NA NA NA ...
##  $ leaf_width7           : num [1:840] NA NA NA NA NA 0.3 NA NA NA NA ...
##  $ leaf_width8           : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width9           : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width10          : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ pad_mass_g            : num [1:840] 0.543 0.529 0.536 0.491 0.592 ...
##  $ pad_epiphyte_mass_g   : num [1:840] 0.549 0.53 0.543 0.494 0.596 ...
##  $ rhi_mass_fw           : num [1:840] 0.227 0.488 0.346 0.397 0.455 ...
##  $ rhi_foil              : num [1:840] 0.658 0.427 0.45 0.376 0.55 ...
##  $ rhi_foil_dw_g         : num [1:840] 0.684 0.478 0.49 0.411 0.586 ...
##  $ shoot_mass_fw         : num [1:840] 0.269 0.412 0.165 0.321 0.116 ...
##  $ shoot_foil            : num [1:840] 0.905 0.728 0.758 0.611 0.708 ...
##  $ shoot_foil_dw         : num [1:840] 0.944 0.791 0.783 0.66 0.724 ...
##  $ xs_shoot_mass_fw      : num [1:840] 4.77 NA NA NA NA ...
##  $ xs_shoot_foil         : num [1:840] 0.789 NA NA NA NA ...
##  $ xs_shoot_foil_dw      : num [1:840] 1.38 NA NA NA NA ...
##  $ xs_pad_mass_g         : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ xs_epiphyte_pad_mass_g: num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_count             : int [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_fw                : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil              : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil_dw           : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ gamm_amph_count       : int [1:840] 3 NA NA NA NA 21 NA NA NA NA ...
##  $ gamm_amph_fw          : num [1:840] 0.0059 NA NA NA NA 0.036 NA NA NA NA ...
##  $ gamm_amph_foil        : num [1:840] 0.495 NA NA NA NA ...
##  $ gamm_amph_foil_dw     : num [1:840] 0.497 NA NA NA NA ...
##  $ caprel_count          : int [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_fw             : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil           : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil_dw        : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_count          : int [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_fw             : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil           : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil_dw        : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_count       : int [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_fw          : chr [1:840] "" "" "" "" ...
##  $ gastropod_foil        : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_foil_dw     : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_count            : int [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_fw               : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil             : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil_dw          : num [1:840] NA NA NA NA NA NA NA NA NA NA ...
##  $ polychaete_count      : int [1:840] 6 NA NA NA NA 7 NA NA NA NA ...
##  $ other_animal_notes    : chr [1:840] "" "" "" "" ...
##  $ notes                 : chr [1:840] "" "" "" "" ...
##  $ shoot_dw              : num [1:840] 0.039 0.0629 0.0247 0.0489 0.0167 ...
##  $ max_length            : num [1:840] 22.1 25.2 16.4 21 13.6 32.4 23 22.7 26 26.5 ...
##  $ mean_length           : num [1:840] 22.1 25.2 16.4 21 13 32.4 23 16.5 26 26.5 ...
##  - attr(*, "groups")= tibble [840 × 1] (S3: tbl_df/tbl/data.frame)
##   ..$ .rows: list<int> [1:840] 
##   .. ..$ : int 1
##   .. ..$ : int 2
##   .. ..$ : int 3
##   .. ..$ : int 4
##   .. ..$ : int 5
##   .. ..$ : int 6
##   .. ..$ : int 7
##   .. ..$ : int 8
##   .. ..$ : int 9
##   .. ..$ : int 10
##   .. ..$ : int 11
##   .. ..$ : int 12
##   .. ..$ : int 13
##   .. ..$ : int 14
##   .. ..$ : int 15
##   .. ..$ : int 16
##   .. ..$ : int 17
##   .. ..$ : int 18
##   .. ..$ : int 19
##   .. ..$ : int 20
##   .. ..$ : int 21
##   .. ..$ : int 22
##   .. ..$ : int 23
##   .. ..$ : int 24
##   .. ..$ : int 25
##   .. ..$ : int 26
##   .. ..$ : int 27
##   .. ..$ : int 28
##   .. ..$ : int 29
##   .. ..$ : int 30
##   .. ..$ : int 31
##   .. ..$ : int 32
##   .. ..$ : int 33
##   .. ..$ : int 34
##   .. ..$ : int 35
##   .. ..$ : int 36
##   .. ..$ : int 37
##   .. ..$ : int 38
##   .. ..$ : int 39
##   .. ..$ : int 40
##   .. ..$ : int 41
##   .. ..$ : int 42
##   .. ..$ : int 43
##   .. ..$ : int 44
##   .. ..$ : int 45
##   .. ..$ : int 46
##   .. ..$ : int 47
##   .. ..$ : int 48
##   .. ..$ : int 49
##   .. ..$ : int 50
##   .. ..$ : int 51
##   .. ..$ : int 52
##   .. ..$ : int 53
##   .. ..$ : int 54
##   .. ..$ : int 55
##   .. ..$ : int 56
##   .. ..$ : int 57
##   .. ..$ : int 58
##   .. ..$ : int 59
##   .. ..$ : int 60
##   .. ..$ : int 61
##   .. ..$ : int 62
##   .. ..$ : int 63
##   .. ..$ : int 64
##   .. ..$ : int 65
##   .. ..$ : int 66
##   .. ..$ : int 67
##   .. ..$ : int 68
##   .. ..$ : int 69
##   .. ..$ : int 70
##   .. ..$ : int 71
##   .. ..$ : int 72
##   .. ..$ : int 73
##   .. ..$ : int 74
##   .. ..$ : int 75
##   .. ..$ : int 76
##   .. ..$ : int 77
##   .. ..$ : int 78
##   .. ..$ : int 79
##   .. ..$ : int 80
##   .. ..$ : int 81
##   .. ..$ : int 82
##   .. ..$ : int 83
##   .. ..$ : int 84
##   .. ..$ : int 85
##   .. ..$ : int 86
##   .. ..$ : int 87
##   .. ..$ : int 88
##   .. ..$ : int 89
##   .. ..$ : int 90
##   .. ..$ : int 91
##   .. ..$ : int 92
##   .. ..$ : int 93
##   .. ..$ : int 94
##   .. ..$ : int 95
##   .. ..$ : int 96
##   .. ..$ : int 97
##   .. ..$ : int 98
##   .. ..$ : int 99
##   .. .. [list output truncated]
##   .. ..@ ptype: int(0)
levels(as.factor(eel_sub$YYYYMMDD))
##  [1] "20170429" "20170524" "20170525" "20170528" "20170622" "20170623"
##  [7] "20170624" "20170625" "20170627" "20170628" "20170722" "20170723"
## [13] "20170724" "20170725" "20170726" "20170820" "20170821" "20170822"
summary(eel_sub)
##      site           collection_date       YYYYMMDD           quadrat    
##  Length:840         Length:840         Min.   :20170429   Min.   :1.00  
##  Class :character   Class :character   1st Qu.:20170525   1st Qu.:2.75  
##  Mode  :character   Mode  :character   Median :20170625   Median :4.50  
##                                        Mean   :20170630   Mean   :4.50  
##                                        3rd Qu.:20170724   3rd Qu.:6.25  
##                                        Max.   :20170822   Max.   :8.00  
##                                                                         
##      plant     rhi_length        node1           node2           node3       
##  Min.   :1   Min.   :0.800   Min.   :0.100   Min.   :0.100   Min.   :0.1000  
##  1st Qu.:2   1st Qu.:5.000   1st Qu.:0.600   1st Qu.:0.700   1st Qu.:0.5000  
##  Median :3   Median :5.000   Median :1.000   Median :1.100   Median :0.9000  
##  Mean   :3   Mean   :4.608   Mean   :1.079   Mean   :1.246   Mean   :0.9314  
##  3rd Qu.:4   3rd Qu.:5.000   3rd Qu.:1.400   3rd Qu.:1.700   3rd Qu.:1.3000  
##  Max.   :5   Max.   :5.000   Max.   :3.100   Max.   :3.600   Max.   :2.7000  
##              NA's   :7       NA's   :7       NA's   :25      NA's   :183     
##      node4           node5         leaf_length1     leaf_length2   
##  Min.   :0.100   Min.   :0.1000   Min.   :  2.30   Min.   :  1.70  
##  1st Qu.:0.400   1st Qu.:0.3000   1st Qu.: 22.70   1st Qu.: 22.20  
##  Median :0.600   Median :0.4000   Median : 35.00   Median : 36.70  
##  Mean   :0.684   Mean   :0.4946   Mean   : 39.04   Mean   : 40.05  
##  3rd Qu.:0.900   3rd Qu.:0.7000   3rd Qu.: 50.80   3rd Qu.: 53.20  
##  Max.   :8.000   Max.   :1.5000   Max.   :107.00   Max.   :133.20  
##  NA's   :359     NA's   :508      NA's   :7        NA's   :7       
##   leaf_length3     leaf_length4     leaf_length5     leaf_length6   
##  Min.   :  3.10   Min.   :  2.10   Min.   :  1.90   Min.   :  2.10  
##  1st Qu.: 19.60   1st Qu.: 18.60   1st Qu.: 17.05   1st Qu.: 13.80  
##  Median : 32.80   Median : 33.20   Median : 31.20   Median : 29.00  
##  Mean   : 37.33   Mean   : 36.65   Mean   : 35.01   Mean   : 33.01  
##  3rd Qu.: 50.20   3rd Qu.: 49.30   3rd Qu.: 46.30   3rd Qu.: 45.65  
##  Max.   :125.80   Max.   :126.00   Max.   :124.20   Max.   :113.00  
##  NA's   :8        NA's   :31       NA's   :249      NA's   :585     
##   leaf_length7    leaf_length8     leaf_length9   leaf_length10  
##  Min.   : 1.30   Min.   : 2.600   Min.   : 3.00   Min.   :16.00  
##  1st Qu.:13.00   1st Qu.: 7.825   1st Qu.: 8.25   1st Qu.:18.18  
##  Median :27.50   Median :21.850   Median :14.05   Median :20.35  
##  Mean   :29.43   Mean   :29.128   Mean   :19.22   Mean   :20.35  
##  3rd Qu.:42.65   3rd Qu.:38.450   3rd Qu.:28.02   3rd Qu.:22.52  
##  Max.   :82.60   Max.   :97.200   Max.   :45.20   Max.   :24.70  
##  NA's   :744     NA's   :808      NA's   :834     NA's   :838    
##   leaf_width1     leaf_width2      leaf_width3    leaf_width4    
##  Min.   :0.100   Min.   :0.1000   Min.   :0.10   Min.   :0.1000  
##  1st Qu.:0.300   1st Qu.:0.3000   1st Qu.:0.30   1st Qu.:0.4000  
##  Median :0.400   Median :0.4000   Median :0.40   Median :0.4000  
##  Mean   :0.408   Mean   :0.4139   Mean   :0.42   Mean   :0.4223  
##  3rd Qu.:0.500   3rd Qu.:0.5000   3rd Qu.:0.50   3rd Qu.:0.5000  
##  Max.   :0.800   Max.   :0.8000   Max.   :0.80   Max.   :0.8000  
##  NA's   :7       NA's   :7        NA's   :8      NA's   :31      
##   leaf_width5      leaf_width6      leaf_width7      leaf_width8 
##  Min.   :0.1000   Min.   :0.1000   Min.   :0.2000   Min.   :0.2  
##  1st Qu.:0.3000   1st Qu.:0.3000   1st Qu.:0.3000   1st Qu.:0.3  
##  Median :0.4000   Median :0.4000   Median :0.4000   Median :0.4  
##  Mean   :0.4206   Mean   :0.4051   Mean   :0.3979   Mean   :0.4  
##  3rd Qu.:0.5000   3rd Qu.:0.5000   3rd Qu.:0.5000   3rd Qu.:0.5  
##  Max.   :0.8000   Max.   :0.7000   Max.   :0.7000   Max.   :0.6  
##  NA's   :249      NA's   :585      NA's   :744      NA's   :808  
##   leaf_width9      leaf_width10     pad_mass_g     pad_epiphyte_mass_g
##  Min.   :0.2000   Min.   :0.300   Min.   :0.3216   Min.   :0.3247     
##  1st Qu.:0.3000   1st Qu.:0.325   1st Qu.:0.6301   1st Qu.:0.6357     
##  Median :0.3000   Median :0.350   Median :0.6620   Median :0.6694     
##  Mean   :0.3167   Mean   :0.350   Mean   :0.6585   Mean   :0.6658     
##  3rd Qu.:0.3750   3rd Qu.:0.375   3rd Qu.:0.6954   3rd Qu.:0.7025     
##  Max.   :0.4000   Max.   :0.400   Max.   :0.8519   Max.   :0.9698     
##  NA's   :834      NA's   :838     NA's   :17       NA's   :17         
##   rhi_mass_fw        rhi_foil      rhi_foil_dw_g    shoot_mass_fw   
##  Min.   :0.0353   Min.   :0.2876   Min.   :0.3297   Min.   :0.0293  
##  1st Qu.:0.4226   1st Qu.:0.5985   1st Qu.:0.6867   1st Qu.:0.7381  
##  Median :0.5814   Median :0.7381   Median :0.8214   Median :1.3285  
##  Mean   :0.6008   Mean   :0.7875   Mean   :0.8706   Mean   :1.6456  
##  3rd Qu.:0.7535   3rd Qu.:0.9577   3rd Qu.:1.0481   3rd Qu.:2.1778  
##  Max.   :1.9121   Max.   :1.4849   Max.   :1.6390   Max.   :6.6599  
##  NA's   :7        NA's   :7        NA's   :7        NA's   :7       
##    shoot_foil     shoot_foil_dw    xs_shoot_mass_fw  xs_shoot_foil   
##  Min.   :0.4336   Min.   :0.4864   Min.   : 0.0243   Min.   :0.5372  
##  1st Qu.:1.2024   1st Qu.:1.3980   1st Qu.: 3.2862   1st Qu.:1.3737  
##  Median :1.6170   Median :1.9205   Median : 6.9612   Median :2.0462  
##  Mean   :1.6078   Mean   :1.8629   Mean   :10.1766   Mean   :2.1232  
##  3rd Qu.:2.0314   3rd Qu.:2.3504   3rd Qu.:12.7799   3rd Qu.:2.6255  
##  Max.   :3.1062   Max.   :3.3807   Max.   :66.8455   Max.   :5.3852  
##  NA's   :7        NA's   :7        NA's   :672       NA's   :672     
##  xs_shoot_foil_dw  xs_pad_mass_g    xs_epiphyte_pad_mass_g   iso_count     
##  Min.   : 0.6509   Min.   :0.6426   Min.   :0.6730         Min.   : 1.000  
##  1st Qu.: 2.2486   1st Qu.:0.6596   1st Qu.:0.6908         1st Qu.: 1.000  
##  Median : 2.9657   Median :0.6766   Median :0.7087         Median : 1.000  
##  Mean   : 3.3922   Mean   :0.6766   Mean   :0.7087         Mean   : 2.405  
##  3rd Qu.: 3.9205   3rd Qu.:0.6937   3rd Qu.:0.7266         3rd Qu.: 2.000  
##  Max.   :12.5251   Max.   :0.7107   Max.   :0.7444         Max.   :20.000  
##  NA's   :672       NA's   :838      NA's   :838            NA's   :761     
##      iso_fw          iso_foil       iso_foil_dw     gamm_amph_count 
##  Min.   :0.0002   Min.   :0.2344   Min.   :0.2346   Min.   : 1.000  
##  1st Qu.:0.0031   1st Qu.:0.4008   1st Qu.:0.4327   1st Qu.: 1.000  
##  Median :0.0181   Median :0.4801   Median :0.5111   Median : 3.000  
##  Mean   :0.1226   Mean   :0.4996   Mean   :0.5290   Mean   : 4.649  
##  3rd Qu.:0.1878   3rd Qu.:0.5650   3rd Qu.:0.5772   3rd Qu.: 6.000  
##  Max.   :1.2169   Max.   :0.9067   Max.   :0.9509   Max.   :33.000  
##  NA's   :761      NA's   :761      NA's   :761      NA's   :746     
##   gamm_amph_fw    gamm_amph_foil   gamm_amph_foil_dw  caprel_count   
##  Min.   :0.0004   Min.   :0.2516   Min.   :0.2516    Min.   : 1.000  
##  1st Qu.:0.0031   1st Qu.:0.3984   1st Qu.:0.3988    1st Qu.: 1.000  
##  Median :0.0067   Median :0.4884   Median :0.4894    Median : 3.000  
##  Mean   :0.0118   Mean   :0.4941   Mean   :0.4950    Mean   : 5.695  
##  3rd Qu.:0.0123   3rd Qu.:0.5392   3rd Qu.:0.5395    3rd Qu.: 7.750  
##  Max.   :0.1460   Max.   :1.0428   Max.   :1.0438    Max.   :28.000  
##  NA's   :747      NA's   :747      NA's   :747       NA's   :758     
##    caprel_fw       caprel_foil     caprel_foil_dw    limpet_count   
##  Min.   :0.0003   Min.   :0.0001   Min.   :0.0002   Min.   : 1.000  
##  1st Qu.:0.0026   1st Qu.:0.3694   1st Qu.:0.3701   1st Qu.: 1.000  
##  Median :0.0082   Median :0.4709   Median :0.4713   Median : 3.000  
##  Mean   :0.0320   Mean   :0.4678   Mean   :0.4693   Mean   : 4.216  
##  3rd Qu.:0.0275   3rd Qu.:0.5586   3rd Qu.:0.5591   3rd Qu.: 5.000  
##  Max.   :0.6490   Max.   :1.0819   Max.   :1.0860   Max.   :17.000  
##  NA's   :763      NA's   :759      NA's   :759      NA's   :738     
##    limpet_fw       limpet_foil     limpet_foil_dw   gastropod_count 
##  Min.   :0.0037   Min.   :0.2127   Min.   :0.2152   Min.   : 1.000  
##  1st Qu.:0.0280   1st Qu.:0.4113   1st Qu.:0.4436   1st Qu.: 2.000  
##  Median :0.0634   Median :0.4988   Median :0.5363   Median : 3.000  
##  Mean   :0.1240   Mean   :0.5046   Mean   :0.5434   Mean   : 8.767  
##  3rd Qu.:0.1509   3rd Qu.:0.5760   3rd Qu.:0.6236   3rd Qu.: 9.750  
##  Max.   :0.8388   Max.   :1.0519   Max.   :1.0611   Max.   :70.000  
##  NA's   :738      NA's   :738      NA's   :738      NA's   :754     
##  gastropod_fw       gastropod_foil   gastropod_foil_dw   crab_count 
##  Length:840         Min.   :0.0001   Min.   :0.0002    Min.   :1    
##  Class :character   1st Qu.:0.3753   1st Qu.:0.4020    1st Qu.:1    
##  Mode  :character   Median :0.4532   Median :0.4832    Median :1    
##                     Mean   :0.4802   Mean   :0.5010    Mean   :1    
##                     3rd Qu.:0.5550   3rd Qu.:0.5772    3rd Qu.:1    
##                     Max.   :1.0419   Max.   :1.0514    Max.   :1    
##                     NA's   :753      NA's   :753       NA's   :839  
##     crab_fw         crab_foil       crab_foil_dw    polychaete_count
##  Min.   :0.1673   Min.   :0.3328   Min.   :0.3593   Min.   : 0.000  
##  1st Qu.:0.1673   1st Qu.:0.3328   1st Qu.:0.3593   1st Qu.: 2.250  
##  Median :0.1673   Median :0.3328   Median :0.3593   Median : 4.000  
##  Mean   :0.1673   Mean   :0.3328   Mean   :0.3593   Mean   : 5.267  
##  3rd Qu.:0.1673   3rd Qu.:0.3328   3rd Qu.:0.3593   3rd Qu.: 6.000  
##  Max.   :0.1673   Max.   :0.3328   Max.   :0.3593   Max.   :35.000  
##  NA's   :839      NA's   :839      NA's   :839      NA's   :690     
##  other_animal_notes    notes              shoot_dw        max_length    
##  Length:840         Length:840         Min.   :0.0042   Min.   :  -Inf  
##  Class :character   Class :character   1st Qu.:0.1261   1st Qu.: 35.67  
##  Mode  :character   Mode  :character   Median :0.2074   Median : 49.20  
##                                        Mean   :0.2551   Mean   :  -Inf  
##                                        3rd Qu.:0.3250   3rd Qu.: 66.83  
##                                        Max.   :1.3277   Max.   :133.20  
##                                        NA's   :7                        
##   mean_length    
##  Min.   :  2.30  
##  1st Qu.: 22.70  
##  Median : 35.00  
##  Mean   : 39.04  
##  3rd Qu.: 50.80  
##  Max.   :107.00  
##  NA's   :7
eel_sub2 <- na.omit(data.frame(eel_sub$shoot_dw, 
                               eel_sub$max_length, 
                               eel_sub$mean_length, 
                               eel_sub$shoot_mass_fw))
names(eel_sub2) <- c("shoot_dw", "max_length", "mean_length", "shoot_mass_fw")

summary(eel_beforeJuly)
##      site           collection_date       YYYYMMDD           quadrat    
##  Length:520         Length:520         Min.   :20170429   Min.   :1.00  
##  Class :character   Class :character   1st Qu.:20170524   1st Qu.:2.75  
##  Mode  :character   Mode  :character   Median :20170528   Median :4.50  
##                                        Mean   :20170549   Mean   :4.50  
##                                        3rd Qu.:20170624   3rd Qu.:6.25  
##                                        Max.   :20170628   Max.   :8.00  
##                                                                         
##      plant     rhi_length        node1           node2           node3       
##  Min.   :1   Min.   :0.800   Min.   :0.100   Min.   :0.100   Min.   :0.1000  
##  1st Qu.:2   1st Qu.:4.300   1st Qu.:0.600   1st Qu.:0.600   1st Qu.:0.4000  
##  Median :3   Median :5.000   Median :0.900   Median :1.100   Median :0.7000  
##  Mean   :3   Mean   :4.439   Mean   :1.018   Mean   :1.196   Mean   :0.8411  
##  3rd Qu.:4   3rd Qu.:5.000   3rd Qu.:1.300   3rd Qu.:1.600   3rd Qu.:1.2000  
##  Max.   :5   Max.   :5.000   Max.   :3.000   Max.   :3.600   Max.   :2.7000  
##              NA's   :7       NA's   :7       NA's   :19      NA's   :116     
##      node4            node5         leaf_length1     leaf_length2   
##  Min.   :0.1000   Min.   :0.1000   Min.   :  2.30   Min.   :  1.70  
##  1st Qu.:0.3000   1st Qu.:0.2000   1st Qu.: 19.00   1st Qu.: 18.30  
##  Median :0.5000   Median :0.3000   Median : 28.40   Median : 30.30  
##  Mean   :0.5618   Mean   :0.3634   Mean   : 33.15   Mean   : 33.83  
##  3rd Qu.:0.7000   3rd Qu.:0.5000   3rd Qu.: 42.40   3rd Qu.: 46.20  
##  Max.   :8.0000   Max.   :1.5000   Max.   :105.20   Max.   :110.50  
##  NA's   :206      NA's   :293      NA's   :7        NA's   :7       
##   leaf_length3     leaf_length4    leaf_length5     leaf_length6  
##  Min.   :  3.10   Min.   : 2.10   Min.   :  1.90   Min.   : 2.10  
##  1st Qu.: 14.45   1st Qu.:15.55   1st Qu.: 15.65   1st Qu.:12.53  
##  Median : 26.00   Median :28.00   Median : 26.40   Median :27.10  
##  Mean   : 31.11   Mean   :31.02   Mean   : 29.54   Mean   :30.06  
##  3rd Qu.: 43.10   3rd Qu.:41.90   3rd Qu.: 39.25   3rd Qu.:40.35  
##  Max.   :109.90   Max.   :96.00   Max.   :111.70   Max.   :94.20  
##  NA's   :8        NA's   :22      NA's   :117      NA's   :310    
##   leaf_length7    leaf_length8     leaf_length9   leaf_length10  
##  Min.   : 1.30   Min.   : 2.600   Min.   : 3.00   Min.   :16.00  
##  1st Qu.:14.45   1st Qu.: 7.475   1st Qu.: 8.25   1st Qu.:18.18  
##  Median :25.00   Median :21.000   Median :14.05   Median :20.35  
##  Mean   :28.79   Mean   :27.190   Mean   :19.22   Mean   :20.35  
##  3rd Qu.:41.15   3rd Qu.:36.900   3rd Qu.:28.02   3rd Qu.:22.52  
##  Max.   :82.60   Max.   :97.200   Max.   :45.20   Max.   :24.70  
##  NA's   :433     NA's   :490      NA's   :514     NA's   :518    
##   leaf_width1      leaf_width2     leaf_width3      leaf_width4    
##  Min.   :0.1000   Min.   :0.100   Min.   :0.1000   Min.   :0.1000  
##  1st Qu.:0.3000   1st Qu.:0.300   1st Qu.:0.3000   1st Qu.:0.3000  
##  Median :0.4000   Median :0.400   Median :0.4000   Median :0.4000  
##  Mean   :0.3739   Mean   :0.383   Mean   :0.3918   Mean   :0.3946  
##  3rd Qu.:0.5000   3rd Qu.:0.500   3rd Qu.:0.5000   3rd Qu.:0.5000  
##  Max.   :0.8000   Max.   :0.800   Max.   :0.8000   Max.   :0.8000  
##  NA's   :7        NA's   :7       NA's   :8        NA's   :22      
##   leaf_width5     leaf_width6      leaf_width7      leaf_width8    
##  Min.   :0.100   Min.   :0.1000   Min.   :0.2000   Min.   :0.2000  
##  1st Qu.:0.300   1st Qu.:0.3000   1st Qu.:0.3000   1st Qu.:0.3000  
##  Median :0.400   Median :0.4000   Median :0.4000   Median :0.4000  
##  Mean   :0.394   Mean   :0.3857   Mean   :0.3851   Mean   :0.3867  
##  3rd Qu.:0.500   3rd Qu.:0.5000   3rd Qu.:0.4000   3rd Qu.:0.4000  
##  Max.   :0.800   Max.   :0.7000   Max.   :0.7000   Max.   :0.6000  
##  NA's   :117     NA's   :310      NA's   :433      NA's   :490     
##   leaf_width9      leaf_width10     pad_mass_g     pad_epiphyte_mass_g
##  Min.   :0.2000   Min.   :0.300   Min.   :0.4728   Min.   :0.4806     
##  1st Qu.:0.3000   1st Qu.:0.325   1st Qu.:0.6298   1st Qu.:0.6351     
##  Median :0.3000   Median :0.350   Median :0.6655   Median :0.6735     
##  Mean   :0.3167   Mean   :0.350   Mean   :0.6603   Mean   :0.6667     
##  3rd Qu.:0.3750   3rd Qu.:0.375   3rd Qu.:0.7017   3rd Qu.:0.7066     
##  Max.   :0.4000   Max.   :0.400   Max.   :0.8519   Max.   :0.9698     
##  NA's   :514      NA's   :518     NA's   :17       NA's   :17         
##   rhi_mass_fw        rhi_foil      rhi_foil_dw_g    shoot_mass_fw   
##  Min.   :0.0353   Min.   :0.2876   Min.   :0.3297   Min.   :0.0293  
##  1st Qu.:0.3652   1st Qu.:0.7200   1st Qu.:0.7841   1st Qu.:0.5250  
##  Median :0.5143   Median :0.8724   Median :0.9502   Median :1.0255  
##  Mean   :0.5399   Mean   :0.9020   Mean   :0.9759   Mean   :1.3357  
##  3rd Qu.:0.6996   3rd Qu.:1.1072   3rd Qu.:1.1889   3rd Qu.:1.7271  
##  Max.   :1.4428   Max.   :1.4849   Max.   :1.6390   Max.   :6.6599  
##  NA's   :7        NA's   :7        NA's   :7        NA's   :7       
##    shoot_foil     shoot_foil_dw    xs_shoot_mass_fw  xs_shoot_foil   
##  Min.   :0.4336   Min.   :0.4864   Min.   : 0.0243   Min.   :0.5372  
##  1st Qu.:1.0716   1st Qu.:1.2224   1st Qu.: 2.9386   1st Qu.:1.1943  
##  Median :1.3160   Median :1.5332   Median : 6.2291   Median :1.5204  
##  Mean   :1.3830   Mean   :1.5803   Mean   :10.4281   Mean   :1.8368  
##  3rd Qu.:1.7837   3rd Qu.:2.0987   3rd Qu.:13.7739   3rd Qu.:2.0028  
##  Max.   :2.8118   Max.   :2.9439   Max.   :66.8455   Max.   :5.3852  
##  NA's   :7        NA's   :7        NA's   :416       NA's   :416     
##  xs_shoot_foil_dw  xs_pad_mass_g    xs_epiphyte_pad_mass_g   iso_count    
##  Min.   : 0.6509   Min.   :0.7107   Min.   :0.7444         Min.   :1.000  
##  1st Qu.: 1.8682   1st Qu.:0.7107   1st Qu.:0.7444         1st Qu.:1.000  
##  Median : 2.4118   Median :0.7107   Median :0.7444         Median :1.000  
##  Mean   : 3.1024   Mean   :0.7107   Mean   :0.7444         Mean   :1.487  
##  3rd Qu.: 3.2686   3rd Qu.:0.7107   3rd Qu.:0.7444         3rd Qu.:2.000  
##  Max.   :12.5068   Max.   :0.7107   Max.   :0.7444         Max.   :5.000  
##  NA's   :416       NA's   :519      NA's   :519            NA's   :481    
##      iso_fw          iso_foil       iso_foil_dw     gamm_amph_count 
##  Min.   :0.0002   Min.   :0.3042   Min.   :0.3756   Min.   : 1.000  
##  1st Qu.:0.0050   1st Qu.:0.4584   1st Qu.:0.4948   1st Qu.: 2.000  
##  Median :0.1364   Median :0.5225   Median :0.5599   Median : 3.000  
##  Mean   :0.1862   Mean   :0.5613   Mean   :0.6097   Mean   : 5.678  
##  3rd Qu.:0.2212   3rd Qu.:0.6674   3rd Qu.:0.7326   3rd Qu.: 8.000  
##  Max.   :1.2169   Max.   :0.9067   Max.   :0.9509   Max.   :33.000  
##  NA's   :481      NA's   :481      NA's   :481      NA's   :461     
##   gamm_amph_fw    gamm_amph_foil   gamm_amph_foil_dw  caprel_count   
##  Min.   :0.0004   Min.   :0.2773   Min.   :0.2779    Min.   : 1.000  
##  1st Qu.:0.0046   1st Qu.:0.4560   1st Qu.:0.4568    1st Qu.: 1.000  
##  Median :0.0075   Median :0.5098   Median :0.5100    Median : 2.000  
##  Mean   :0.0147   Mean   :0.5325   Mean   :0.5336    Mean   : 4.979  
##  3rd Qu.:0.0140   3rd Qu.:0.5708   3rd Qu.:0.5727    3rd Qu.: 5.000  
##  Max.   :0.1460   Max.   :1.0428   Max.   :1.0438    Max.   :28.000  
##  NA's   :462      NA's   :462      NA's   :462       NA's   :472     
##    caprel_fw       caprel_foil     caprel_foil_dw    limpet_count   
##  Min.   :0.0003   Min.   :0.0001   Min.   :0.0002   Min.   : 1.000  
##  1st Qu.:0.0026   1st Qu.:0.4412   1st Qu.:0.4434   1st Qu.: 2.000  
##  Median :0.0074   Median :0.5155   Median :0.5183   Median : 3.000  
##  Mean   :0.0424   Mean   :0.5176   Mean   :0.5195   Mean   : 4.181  
##  3rd Qu.:0.0234   3rd Qu.:0.6414   3rd Qu.:0.6454   3rd Qu.: 5.250  
##  Max.   :0.6490   Max.   :1.0819   Max.   :1.0860   Max.   :15.000  
##  NA's   :476      NA's   :473      NA's   :473      NA's   :448     
##    limpet_fw       limpet_foil     limpet_foil_dw   gastropod_count 
##  Min.   :0.0037   Min.   :0.2127   Min.   :0.2152   Min.   : 1.000  
##  1st Qu.:0.0216   1st Qu.:0.4171   1st Qu.:0.4538   1st Qu.: 1.250  
##  Median :0.0464   Median :0.5199   Median :0.5500   Median : 2.000  
##  Mean   :0.1183   Mean   :0.5230   Mean   :0.5587   Mean   : 3.405  
##  3rd Qu.:0.1445   3rd Qu.:0.6012   3rd Qu.:0.6429   3rd Qu.: 4.000  
##  Max.   :0.8388   Max.   :1.0519   Max.   :1.0611   Max.   :16.000  
##  NA's   :448      NA's   :448      NA's   :448      NA's   :478     
##  gastropod_fw       gastropod_foil   gastropod_foil_dw   crab_count 
##  Length:520         Min.   :0.0001   Min.   :0.0002    Min.   : NA  
##  Class :character   1st Qu.:0.3992   1st Qu.:0.4186    1st Qu.: NA  
##  Mode  :character   Median :0.5174   Median :0.5216    Median : NA  
##                     Mean   :0.5256   Mean   :0.5394    Mean   :NaN  
##                     3rd Qu.:0.6185   3rd Qu.:0.6308    3rd Qu.: NA  
##                     Max.   :1.0419   Max.   :1.0514    Max.   : NA  
##                     NA's   :478      NA's   :478       NA's   :520  
##     crab_fw      crab_foil    crab_foil_dw polychaete_count other_animal_notes
##  Min.   : NA   Min.   : NA   Min.   : NA   Min.   : 1.000   Length:520        
##  1st Qu.: NA   1st Qu.: NA   1st Qu.: NA   1st Qu.: 3.000   Class :character  
##  Median : NA   Median : NA   Median : NA   Median : 4.000   Mode  :character  
##  Mean   :NaN   Mean   :NaN   Mean   :NaN   Mean   : 5.267                     
##  3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: 6.000                     
##  Max.   : NA   Max.   : NA   Max.   : NA   Max.   :35.000                     
##  NA's   :520   NA's   :520   NA's   :520   NA's   :419                        
##     notes              shoot_dw        max_length      mean_length    
##  Length:520         Min.   :0.0042   Min.   :  -Inf   Min.   :  2.30  
##  Class :character   1st Qu.:0.0938   1st Qu.: 29.27   1st Qu.: 19.00  
##  Mode  :character   Median :0.1576   Median : 43.40   Median : 28.40  
##                     Mean   :0.1973   Mean   :  -Inf   Mean   : 33.15  
##                     3rd Qu.:0.2575   3rd Qu.: 57.23   3rd Qu.: 42.40  
##                     Max.   :0.8045   Max.   :111.70   Max.   :105.20  
##                     NA's   :7                         NA's   :7
eel_bJuly2 <- na.omit(data.frame(eel_beforeJuly$shoot_dw,
                               eel_beforeJuly$max_length,
                               eel_beforeJuly$mean_length,
                               eel_beforeJuly$shoot_mass_fw))
names(eel_bJuly2) <- c("shoot_dw", "max_length", "mean_length", "shoot_mass_fw")

Look at data

plot(eel_sub2$max_length, eel_sub2$shoot_dw)

# Looks like the variance in shoot dw might be increasing with the the length (mm) of the longest blade. 
plot(eel_sub2$mean_length, eel_sub2$shoot_dw)

plot(eel_sub2$max_length, eel_sub2$shoot_mass_fw)

# not as tight as a correlation
cor(eel_sub2$max_length, eel_sub2$shoot_dw, use = "complete.obs")
## [1] 0.8317317
cor(eel_sub2$mean_length, eel_sub2$shoot_dw, use = "complete.obs")
## [1] 0.5924666
cor(eel_sub2$mean_length, eel_sub2$shoot_mass_fw, use = "complete.obs")
## [1] 0.6335515
# stronger positive linear relationship with max length than average length (makes sense)
par(mfrow=c(1,2))
boxplot(eel_sub2$max_length)
boxplot(eel_sub2$shoot_dw)

# Looks like both varibles may have some outliers, in particular the shoot dry weight. 

par(mfrow=c(1,2))
plot(density(eel_sub2$max_length), ylab = "Frequency")
plot(density(eel_sub2$shoot_dw, na.rm = T), ylab = "Frequency")

hist(eel_sub2$max_length)
# max length looks pretty normal with some slight right? skew
hist(eel_sub2$shoot_dw)

range(eel_sub2$shoot_dw)
## [1] 0.0042 1.3277
# heavily right skewed does log make it look better?
hist(log(eel_sub2$shoot_dw))
# skews it the other way... 


# lets write out the appropriately cleaned 2017 eelgrass data so we can quickly use it to convert eelgrass length to biomass in the future
#write.csv(eel_sub2, file = "Data/eelgrass_conversions_for_lm_2017")

Linear models

All Data

12/7/20 removed july filter## data only before july

dat <- na.omit(data.frame(eel_sub2$shoot_dw, eel_sub2$max_length))
names(dat) <- c("dw", "max_length")

plot(dat$dw, dat$max_length)

# Lets fit an untransformed linear model 
fit.lm <- lm(dw ~ max_length, data = dat)
par(mfrow=c(2,2))
plot(fit.lm, which = 1:4)

# looks like there might be some funky stuff happening with the residuals -- maybe an increase in variance along the fitted values. And the QQ plot looks real gross with some concavity (i.e. skewness) and some possible outliers. The cooks plot makes it look like there case numbers (171, 230, 498) could be influencial outliers. Some of this is consisten with the histogram of the data (see above)
summary(fit.lm)
## 
## Call:
## lm(formula = dw ~ max_length, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29147 -0.05080 -0.00870  0.02743  0.87255 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0969956  0.0089384  -10.85   <2e-16 ***
## max_length   0.0065732  0.0001522   43.19   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1057 on 831 degrees of freedom
## Multiple R-squared:  0.6918, Adjusted R-squared:  0.6914 
## F-statistic:  1865 on 1 and 831 DF,  p-value: < 2.2e-16
AIC(fit.lm); BIC(fit.lm)
## [1] -1375.169
## [1] -1360.994
# looks like max length is significantly different than 0
# Has an adjusted r2 of 69%... pretty good for ecology. Model is significant, coefficients for the model are not equal to zero (p value =~0)
library(e1071)
skewness(dat$dw)
## [1] 1.577212
skewness(dat$max_length)
## [1] 0.6910809
# the response variable is skewed.... highly skewed

e <- residuals(fit.lm)
shapiro.test(e)
## 
##  Shapiro-Wilk normality test
## 
## data:  e
## W = 0.82894, p-value < 2.2e-16
n <- length(dat$dw)

ggplot(dat, aes(x = max_length, y = dw)) + geom_point() + theme_classic() +
  geom_smooth(method = "lm", se = TRUE)
## `geom_smooth()` using formula = 'y ~ x'

# Does a boxcox indicate a transformation is necessary? 
library(MASS)
boxcox(fit.lm)
boxcox(fit.lm, lambda=seq(from=0, to=0.6, by=.01)) 
# Umm of like square root?
# Could also try a log



hist(dat$dw^0.5)
y.2 <- dat$y^(0.5)
# looks way better with a fourth root transformation, normal distribution
fit2 <- lm(dw^(0.5) ~ max_length, data = dat)
plot(fit2)

# looks a lot better, homoscedascity is good, qq plot tails are a little weird, still some outliers
summary(fit2)
## 
## Call:
## lm(formula = dw^(0.5) ~ max_length, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25974 -0.05257 -0.00637  0.03766  0.48356 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1283679  0.0073131   17.55   <2e-16 ***
## max_length  0.0064325  0.0001245   51.66   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08651 on 831 degrees of freedom
## Multiple R-squared:  0.7625, Adjusted R-squared:  0.7622 
## F-statistic:  2668 on 1 and 831 DF,  p-value: < 2.2e-16
# r squared is better, model is still significant
plot(dat$max_length, dat$dw^(0.5))

# Plot the square root transformed data
lm_eqn <- function(dat){
    m <- fit2;
    eq <- substitute(italic(sqrt(y)) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}
ggplot(dat, aes(x = max_length, y = dw^(0.5))) + 
  geom_point() + 
  geom_smooth(method = "lm", se = TRUE, level = 0.95) + 
  geom_text(x = 25, y = 0.8, label = lm_eqn(dat), parse = TRUE, 
            check_overlap = TRUE) + 
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

# Compare to a logtransformed model (easier to understand)

fit3 <- lm(log(dw) ~ max_length, data = dat)
plot(fit3)

summary(fit3)
## 
## Call:
## lm(formula = log(dw) ~ max_length, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.49183 -0.22835  0.04015  0.25637  1.41016 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.1842390  0.0365173  -87.20   <2e-16 ***
## max_length   0.0286471  0.0006218   46.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.432 on 831 degrees of freedom
## Multiple R-squared:  0.7186, Adjusted R-squared:  0.7183 
## F-statistic:  2122 on 1 and 831 DF,  p-value: < 2.2e-16
hist(log(dat$dw))

plot(dat$max_length, log(dat$dw))

# log-log transformation
lm2 <- lm(log(shoot_dw) ~ log(max_length), data = eel_sub2)
summary(lm2)
## 
## Call:
## lm(formula = log(shoot_dw) ~ log(max_length), data = eel_sub2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13653 -0.22248  0.00754  0.18710  1.30992 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -7.67725    0.09434  -81.38   <2e-16 ***
## log(max_length)  1.55564    0.02416   64.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3328 on 831 degrees of freedom
## Multiple R-squared:  0.833,  Adjusted R-squared:  0.8328 
## F-statistic:  4144 on 1 and 831 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))

plot(lm2, which = 1:4) # some issues but voerall looks really nice

# plot the log-log transformed data with linear model 
lm_eqn <- function(dat){
    m <- lm2;
    eq <- substitute(italic(log(y)) == a + b %.% italic(log(x))*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}
ggplot(dat, aes(x = log(max_length), y = log(dw))) + geom_point() + geom_smooth(method = "lm", se = TRUE, level = 0.95) + geom_text(x = 2.5, y = -0.5, label = lm_eqn(dat), parse = TRUE, check_overlap = TRUE) + 
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

visreg(lm2, gg = T)

visreg(lm2, gg = T, scale = "response")

ggplot(data=eel_sub2, aes(lm2$residuals)) +
geom_histogram(color = "black", fill = "purple4") +
theme(panel.background = element_rect(fill = "white"),
axis.line.x=element_line(),
axis.line.y=element_line()) +
ggtitle("Histogram for Model Residuals") # damns those residuals are NICE. Pretty symmetrical around 0 -- the model fits teh data pretty well 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = dat, aes(x = log(max_length), y = log(dw))) +
geom_point() +
stat_smooth(method = "lm", col = "dodgerblue3") +
theme(panel.background = element_rect(fill = "white"),
axis.line.x=element_line(),
axis.line.y=element_line()) +
ggtitle("Linear Model Fitted to Data")
## `geom_smooth()` using formula = 'y ~ x'

## Before July Which fit of linear model is better? We know that the log-log relationship is better so lets try that and then compare the two model types

dat2 <- na.omit(data.frame(eel_bJuly2$shoot_dw, eel_bJuly2$max_length))
names(dat2) <- c("dw", "max_length")

lm4 <- lm(log(dw) ~ log(max_length), data = dat2)
summary(lm4)
## 
## Call:
## lm(formula = log(dw) ~ log(max_length), data = dat2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0858 -0.2056  0.0128  0.1736  1.2739 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -7.78423    0.11222  -69.37   <2e-16 ***
## log(max_length)  1.57549    0.02987   52.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3237 on 511 degrees of freedom
## Multiple R-squared:  0.8448, Adjusted R-squared:  0.8445 
## F-statistic:  2781 on 1 and 511 DF,  p-value: < 2.2e-16

Use selected linear model to predict 2019 biomass data

Import 2019 data

The data for 2019: is set up in a 3 sheet excel spreadsheets. quadrat_ID in hab_qrt is unique for every quadrat that was done in summer 2019. Use the quadrat_ID to connect to the other sheets (hab_lng and hab_wgt). Hab_lng has all the individual lengths for all measured plants (in mm) (for eelgrass its 15 blades from each quadrat/site. For kelp its up to 3 of each species collected from the site). Hab_wgt has the biomass weights for individual species biomass by bag

For this purpose we are interested in only the length data but need to use the quadrat sheet to make sure we only are looking at the eelgrass sites newest version of data is : RAW_10-22-19

hab_qrt <- read.csv(url("https://knb.ecoinformatics.org/knb/d1/mn/v2/object/urn%3Auuid%3A33a4f3b6-ad30-494d-a0bf-861fe86d729e"), stringsAsFactors = FALSE, header = TRUE)

hab_lng <- read.csv(url("https://knb.ecoinformatics.org/knb/d1/mn/v2/object/urn%3Auuid%3Af7c43f66-6800-49b0-9b3d-43e8e39abcd0"), stringsAsFactors=FALSE, header=TRUE)

#hab_wgt <- read.csv(url("https://knb.ecoinformatics.org/knb/d1/mn/v2/object/urn%3Auuid%3A3b4ab5b8-c023-486f-8a89-7464b9bccf7a"))

loc <- read.csv(url("https://knb.ecoinformatics.org/knb/d1/mn/v2/object/urn%3Auuid%3Ac9c99ce9-fbdd-4879-a2c9-c90448cdba7b"), stringsAsFactors = FALSE, header = TRUE)

Data cleaning

# adjust the site names in qrt data so that we can combine with 
hab_qrt$site <- as.factor(hab_qrt$site)
levels(hab_qrt$site)[levels(hab_qrt$site)=="Goats mouth inlet"]<-"Goat Mouth Inlet"
levels(hab_qrt$site)[levels(hab_qrt$site)=="Naukati Bay"]<-"Naukati"
levels(hab_qrt$site)[levels(hab_qrt$site)=="South Wadleigh Island"]<-"South Wadleigh"
levels(hab_qrt$site)[levels(hab_qrt$site)=="Guktu Bay"]<-"Guktu"
levels(hab_qrt$site)[levels(hab_qrt$site)=="North Fish Egg Island"]<-"North Fish Egg"
levels(hab_qrt$site)[levels(hab_qrt$site)=="North Fish Egg - kelp"]<-"North Fish Egg-Kelp"
levels(hab_qrt$site)[levels(hab_qrt$site)=="Natzuhini Bay"]<-"Natzuhini"
levels(hab_qrt$site)[levels(hab_qrt$site)=="Kaguk Cove"]<-"Kaguk "
levels(hab_qrt$site)[levels(hab_qrt$site)=="Farallon Bay"]<-"Farallon"
levels(hab_qrt$site)[levels(hab_qrt$site)=="Chusini Cove"]<-"Chusini-Kladein Flat"
levels(hab_qrt$site)[levels(hab_qrt$site)=="South Fish Egg Island"]<-"South Fish Egg"
levels(hab_qrt$site)[levels(hab_qrt$site)=="Baker Island - kelp"]<-"Baker Island - kelp "
hab_qrt$site <- as.character(hab_qrt$site)

hab_qrt <- left_join(hab_qrt, loc, by = c("site" = "site_2019", "habitat" = "habitat")) %>%
  dplyr::select(-c(siteID_NOAA, site_2017, site_2018, place_name, study, latitude, longitude,
                   freshwater, sediment_description, general_description)) %>%
  filter(habitat == "eelgrass")

We want only the eelgrass sites and need to convert the length measurements (in mm) to cm

glimpse(hab_qrt)
## Rows: 100
## Columns: 13
## $ X                <int> 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2…
## $ quadrat_ID       <int> 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2…
## $ site             <chr> "Shakan - eel", "Shakan - eel", "Shakan - eel", "Shak…
## $ date             <chr> "2019-06-07", "2019-06-07", "2019-06-07", "2019-06-07…
## $ YYYYMMDD         <int> 20190607, 20190607, 20190607, 20190607, 20190607, 201…
## $ habitat          <chr> "eelgrass", "eelgrass", "eelgrass", "eelgrass", "eelg…
## $ quadrat          <int> 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3,…
## $ total_biomass    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ density          <int> 29, 18, 39, 28, 48, 138, 246, 309, 255, 156, 71, 95, …
## $ flowering_shoots <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ notes            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ bay_code         <chr> "CALD", "CALD", "CALD", "CALD", "CALD", "BAKE", "BAKE…
## $ bay_sample       <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"…
# Want to only extract the quadrat numbers that were used to sample eelgrass sites
eel_qrts <- na.omit(ifelse(hab_qrt$habitat == "eelgrass", paste(hab_qrt$quadrat_ID), NA))
# subset the length data by quadrat at an eelgrass site
lng_sub <- subset(hab_lng, quadrat_ID %in% eel_qrts)
levels(as.factor(lng_sub$species)) # only Z. marina, awesome! 
## [1] "Zostera marina"
lng_sub <- lng_sub %>%
  mutate(length_cm = length/10)

Predict based on linear models

all data (lm2)

summary(lm2)
## 
## Call:
## lm(formula = log(shoot_dw) ~ log(max_length), data = eel_sub2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13653 -0.22248  0.00754  0.18710  1.30992 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -7.67725    0.09434  -81.38   <2e-16 ***
## log(max_length)  1.55564    0.02416   64.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3328 on 831 degrees of freedom
## Multiple R-squared:  0.833,  Adjusted R-squared:  0.8328 
## F-statistic:  4144 on 1 and 831 DF,  p-value: < 2.2e-16
# formula :
#   log (y) = beta0 + beta1 * log (x) 
#   log (dw) = -7.67 + 1.55 * log (max_length) 
# create a new dataframe with just the leaf lengths from 2019
newx <- data.frame(max_length = as.numeric(lng_sub$length_cm))
# use the log-log linear model (equation above) to calculate the dry mass of the shoot with 95% CI 
# make sure to exp() the values because its a log-log relationship
pr.lm <- exp(predict(lm2, newdata = newx, interval = "confidence", level = 0.95))

# graph these new predicted values
# create data.frame first
newdata <- cbind(lng_sub, pr.lm)
ggplot(newdata, aes(x = length_cm, y = fit)) + geom_point() + theme_classic() + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

Estimate total biomass (lm2)

str(newdata)
## 'data.frame':    1503 obs. of  9 variables:
##  $ X         : int  27 28 29 30 31 32 33 34 35 36 ...
##  $ plant_ID  : int  27 28 29 30 31 32 33 34 35 36 ...
##  $ quadrat_ID: int  6 6 6 6 6 6 6 6 6 6 ...
##  $ species   : chr  "Zostera marina" "Zostera marina" "Zostera marina" "Zostera marina" ...
##  $ length    : int  579 835 375 334 691 323 581 418 852 544 ...
##  $ length_cm : num  57.9 83.5 37.5 33.4 69.1 32.3 58.1 41.8 85.2 54.4 ...
##  $ fit       : num  0.256 0.452 0.13 0.109 0.337 ...
##  $ lwr       : num  0.25 0.437 0.127 0.106 0.327 ...
##  $ upr       : num  0.262 0.468 0.134 0.112 0.346 ...
str(hab_qrt)
## 'data.frame':    100 obs. of  13 variables:
##  $ X               : int  6 7 8 9 10 11 12 13 14 15 ...
##  $ quadrat_ID      : int  6 7 8 9 10 11 12 13 14 15 ...
##  $ site            : chr  "Shakan - eel" "Shakan - eel" "Shakan - eel" "Shakan - eel" ...
##  $ date            : chr  "2019-06-07" "2019-06-07" "2019-06-07" "2019-06-07" ...
##  $ YYYYMMDD        : int  20190607 20190607 20190607 20190607 20190607 20190618 20190618 20190618 20190618 20190618 ...
##  $ habitat         : chr  "eelgrass" "eelgrass" "eelgrass" "eelgrass" ...
##  $ quadrat         : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ total_biomass   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ density         : int  29 18 39 28 48 138 246 309 255 156 ...
##  $ flowering_shoots: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ notes           : chr  NA NA NA NA ...
##  $ bay_code        : chr  "CALD" "CALD" "CALD" "CALD" ...
##  $ bay_sample      : chr  "A" "A" "A" "A" ...
hab_qrt$density <- as.numeric(hab_qrt$density)
hab_qrt$flowering_shoots <- as.numeric(hab_qrt$flowering_shoots)
# Want to only extract the quadrat numbers that were used to sample eelgrass sites
dsty <- hab_qrt %>%
  filter(habitat == 'eelgrass') %>%
  mutate(density_m2 = (density)*4, density_0.25msq = density) %>%
  mutate(flowering_m2 = (flowering_shoots)*4, flowering_0.25msq = flowering_shoots)
dsty <- dplyr::select(dsty, quadrat_ID, site, date, YYYYMMDD, 
                      habitat, quadrat, total_biomass, density_m2, flowering_m2, 
                      bay_code, bay_sample, notes, density_0.25msq, flowering_0.25msq)

# calculate average biomass for 15 shoots for each quadrat
require(dplyr)
df <- newdata %>%
  group_by(quadrat_ID) %>%
  mutate(avg_biomass = mean(fit))
df1 <- df %>%
  dplyr::select(quadrat_ID, avg_biomass) %>%
  distinct()

# need to add back in site information

df2 <- left_join(df1, dsty, by = "quadrat_ID")
df3 <- left_join(df2, hab_qrt[,1:2], by = "quadrat_ID")
#write.csv(df3, "../APECS Master repository/APECS Master repo/ALL_DATA/seagrass_biomass_conversions_baycodes.csv") 

Before July

all data (lm4)

summary(lm4)
## 
## Call:
## lm(formula = log(dw) ~ log(max_length), data = dat2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0858 -0.2056  0.0128  0.1736  1.2739 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -7.78423    0.11222  -69.37   <2e-16 ***
## log(max_length)  1.57549    0.02987   52.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3237 on 511 degrees of freedom
## Multiple R-squared:  0.8448, Adjusted R-squared:  0.8445 
## F-statistic:  2781 on 1 and 511 DF,  p-value: < 2.2e-16
# formula :
#   log (y) = beta0 + beta1 * log (x) 
#   log (dw) = -7.78 + 1.57 * log (max_length) 
# create a new dataframe with just the leaf lengths from 2019
newx_lm4 <- data.frame(max_length = as.numeric(lng_sub$length_cm))
# use the log-log linear model (equation above) to calculate the dry mass of the shoot with 95% CI 
# make sure to exp() the values because its a log-log relationship
pr.lm4 <- exp(predict(lm4, newdata = newx_lm4, interval = "confidence", level = 0.95))

# graph these new predicted values
# create data.frame first
newdata_lm4 <- cbind(lng_sub, pr.lm4)
ggplot(newdata_lm4, aes(x = length_cm, y = fit)) + geom_point() + theme_classic() + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

Estimate total biomass (lm2)

str(newdata_lm4)
## 'data.frame':    1503 obs. of  9 variables:
##  $ X         : int  27 28 29 30 31 32 33 34 35 36 ...
##  $ plant_ID  : int  27 28 29 30 31 32 33 34 35 36 ...
##  $ quadrat_ID: int  6 6 6 6 6 6 6 6 6 6 ...
##  $ species   : chr  "Zostera marina" "Zostera marina" "Zostera marina" "Zostera marina" ...
##  $ length    : int  579 835 375 334 691 323 581 418 852 544 ...
##  $ length_cm : num  57.9 83.5 37.5 33.4 69.1 32.3 58.1 41.8 85.2 54.4 ...
##  $ fit       : num  0.249 0.444 0.126 0.105 0.329 ...
##  $ lwr       : num  0.241 0.422 0.122 0.102 0.316 ...
##  $ upr       : num  0.258 0.466 0.129 0.108 0.343 ...
str(hab_qrt)
## 'data.frame':    100 obs. of  13 variables:
##  $ X               : int  6 7 8 9 10 11 12 13 14 15 ...
##  $ quadrat_ID      : int  6 7 8 9 10 11 12 13 14 15 ...
##  $ site            : chr  "Shakan - eel" "Shakan - eel" "Shakan - eel" "Shakan - eel" ...
##  $ date            : chr  "2019-06-07" "2019-06-07" "2019-06-07" "2019-06-07" ...
##  $ YYYYMMDD        : int  20190607 20190607 20190607 20190607 20190607 20190618 20190618 20190618 20190618 20190618 ...
##  $ habitat         : chr  "eelgrass" "eelgrass" "eelgrass" "eelgrass" ...
##  $ quadrat         : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ total_biomass   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ density         : num  29 18 39 28 48 138 246 309 255 156 ...
##  $ flowering_shoots: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ notes           : chr  NA NA NA NA ...
##  $ bay_code        : chr  "CALD" "CALD" "CALD" "CALD" ...
##  $ bay_sample      : chr  "A" "A" "A" "A" ...
hab_qrt$density <- as.numeric(hab_qrt$density)
hab_qrt$flowering_shoots <- as.numeric(hab_qrt$flowering_shoots)
# Want to only extract the quadrat numbers that were used to sample eelgrass sites
dsty <- hab_qrt %>%
  filter(habitat == 'eelgrass') %>%
  mutate(density_m2 = (density)*4) %>%
  mutate(flowering_m2 = (flowering_shoots)*4) %>%
  dplyr::select(c(quadrat_ID, density, flowering_shoots, bay_code, 
                  bay_sample, density_m2, flowering_m2))
#dsty <- dsty[,c(1,8,9,11,12)]

# calculate average biomass for 15 shoots for each quadrat
require(dplyr)
df_lm4 <- newdata_lm4 %>%
  group_by(quadrat_ID) %>%
  mutate(avg_biomass = mean(fit))
df1_lm4 <- df_lm4 %>%
  dplyr::select(quadrat_ID, avg_biomass) %>%
  distinct()

# need to add back in site information

df2_lm4 <- left_join(df1_lm4, dsty, by = "quadrat_ID")
df3_lm4 <- left_join(df2_lm4, hab_qrt[,c("quadrat_ID", "site")], by = "quadrat_ID")
#write.csv(df3_lm4, "../ALL_DATA/seagrass_biomass_conversions.csv") 

Look at variation in biomass between years by site

2017

eel17 <- read.csv(url("https://knb.ecoinformatics.org/knb/d1/mn/v2/object/urn%3Auuid%3A5e946e41-4f5f-4499-9969-766f01113971"),
                  stringsAsFactors = FALSE, header = TRUE) # density data

eel_biom17 <- eel_sub %>%
  group_by(site, quadrat) %>%
  mutate(avg_biom_per_quad = mean(shoot_dw)) %>%
  dplyr::select(site:plant, avg_biom_per_quad, mean_length, 
                max_length, shoot_dw,rhi_length:notes)

eel_biom_dens17 <- eel_biom17 %>%
  dplyr::select(site:avg_biom_per_quad, -plant) %>%
  distinct() %>% 
  left_join(eel17, by = c("site", "quadrat")) %>%
  mutate(total_shoots_0.25msq = eelgrass_shoots_0.25msq + flowering_shoots_0.25msq) %>%
  mutate(shoots_1msq = total_shoots_0.25msq * 4) %>%
  dplyr::select(site:YYYYMMDD.y, depth_m,eelgrass_shoots_0.25msq:shoots_1msq, -notes) %>%
  mutate(biom_per_quad_0.25msq = avg_biom_per_quad * total_shoots_0.25msq) %>%
  mutate(biom_msq = biom_per_quad_0.25msq * 4)

biomass_var_2017 <- eel_biom_dens17 %>%
  ggplot() + 
  geom_boxplot(aes(y = biom_msq, x = site)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

biomass_var_2017
## Warning: Removed 4 rows containing non-finite values (`stat_boxplot()`).

# write.csv(eel_biom_dens17, "eelgrass_biom_density_2017.csv")
biomass_var_2019 <- df3_lm4 %>%
  mutate(biom_msq = avg_biomass * density_m2) %>%
  ggplot() +
  geom_boxplot(aes(y = biom_msq, x = site)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

biomass_var_2019_alldat <- df3 %>%
  mutate(biom_msq = avg_biomass * density_m2) %>%
  ggplot() +
  geom_boxplot(aes(y = biom_msq, x = site)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

plot_grid(biomass_var_2019, biomass_var_2019_alldat, 
          nrow=1, ncol=2)

compare 2017 and 2019 biomass

plot_grid(biomass_var_2017, biomass_var_2019_alldat, 
          nrow=1, ncol =2 )
## Warning: Removed 4 rows containing non-finite values (`stat_boxplot()`).